*9_heterogeneity_completer.do*
*created: 6-28-22
*updated: 
*authors: Meredith Welch
*This code runs regression results for mean earnings outcomes
*by completion status for the 2yr and 4yr sample. Modified from 1_prelim_results_earnings_aug2021 and 
*incorporating additional variables indicating degree level from the 1_data_cleaning_july2021.do file. 



/** SECTION 0. SETUP **/ 

global  outdir "/srv/tier1/projects/180_major/Majors"
global root "/srv/tier1/projects/180_major/Majors"
global data $root/1_data_cleaning/data 
global output_desc $root/2_data_analysis/desc_stats
global output_results $root/2_data_analysis/results
global tables $output_results/tables
global plots $output_results/plots 

cap log close
log using $output_results/9_heterogeneity_completer.log, replace

clear all 
set more off
set emptycells drop 
set matsize 11000
set seed 5468486
set sortseed 335854


*COMBINED SECTOR MAJORS MACRO
local major "ag com it voc eng bio sci soc bus und"
*Note: these are the control variables from Andrews, Li, Lovenheim (2014) Table 2 
local controls "z_math z_reading rank_90_math rank_7090_math rank_90_reading rank_7090_reading male white hispanic black asian gift atrisk econ_disad"
local controls_comma "z_math, z_reading, rank_90_math, rank_7090_math, rank_90_reading, rank_7090_reading, male, white, hispanic, black, asian, gift, atrisk, econ_disad"
*DIFFERENT SECTORS MAJORS MACRO
local  majors "ag_4yr com_4yr it_4yr voc_4yr eng_4yr bio_4yr sci_4yr soc_4yr bus_4yr und_4yr ag_2yr com_2yr it_2yr voc_2yr eng_2yr bio_2yr sci_2yr soc_2yr bus_2yr educ_2yr und_2yr"
local  majors_2yr "ag_2yr com_2yr it_2yr voc_2yr eng_2yr bio_2yr sci_2yr soc_2yr bus_2yr educ_2yr und_2yr"
local  majors_4yr "ag_4yr com_4yr it_4yr voc_4yr eng_4yr bio_4yr sci_4yr soc_4yr bus_4yr und_4yr"
local  all_2yr "ag_2yr com_2yr it_2yr voc_2yr eng_2yr bio_2yr sci_2yr soc_2yr bus_2yr und_2yr z_math z_reading rank_90_math rank_7090_math rank_90_reading rank_7090_reading white hispanic black asian gift atrisk econ_disad first_earn_year first_earn_qtr last_earn_year last_earn_qtr cohort.1996 cohort.1997 cohort.1998 cohort.1999 cohort.2000 cohort.2001 cohort.2002 cohort.2003 cohort.2004 cohort.2005"
local  all_4yr "ag_4yr com_4yr it_4yr voc_4yr eng_4yr bio_4yr sci_4yr soc_4yr bus_4yr und_4yr z_math z_reading rank_90_math rank_7090_math rank_90_reading rank_7090_reading white hispanic black asian gift atrisk econ_disad first_earn_year first_earn_qtr last_earn_year last_earn_qtr cohort.1996 cohort.1997 cohort.1998 cohort.1999 cohort.2000 cohort.2001 cohort.2002 cohort.2003 cohort.2004 cohort.2005"



*BEGIN LOOP OVER SECTORS
foreach sector in  "4yr" "2yr" {

*PREPARE LOCALS FOR LOOPS
local letters "E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF AG AH AI AJ AK AL AM AN AO"
cap rm "$tables/earnings_all_`sector'_completer.txt"
cap rm "$tables/earnings_all_`sector'_completer.xml"
}

*PREPARE FILES 
cap rm "$tables/decomp_completer.dta"

cap postclose decomposition
postfile decomposition str20(major component ma period) int(controls_cohort hs_cohortfe postsec_cohortfe) float(coeff se n) using "$tables/decomp_completer.dta", replace

/*********************************
******FOUR-YEAR INSTITUTIONS******
**********************************/

local n_vars : word count `majors_4yr'
local ntot : word count `all_4yr'
tokenize `majors_4yr'

*LOAD DATA COLLAPSED TO ONE OBS PER INDIVIDUAL (with the degree level vars)
use $data/collapsed_data_4yr.dta, clear

*GENERATE LAST COLLEGE-BY-MAJOR INDICATOR FOR CLUSTERING (SAI - 4/24/2023)
gen majors_cluster = .
global temp = 1
foreach major in `majors_4yr' lib_4yr {
	replace majors_cluster = $temp if `major' == 1
	global temp = $temp + 1
}
global drop temp 

gen double inst_major = last_col_4yr*100+	majors_cluster
sum inst_major, detail


*ABOUT 700 OBS ARE MISSING A MAJOR - JUST DROP THESE
drop if inst_major == .

*CREATE COMPLETION DUMMY VARIABLE 
gen completer_BA = deg_level1 == 2 | deg_level2==2 | deg_level3==2 


/*****************************************************
 SECTION 1. MEAN EARNINGS: 5-10, 10-15, 
		and 15-20 YEARS AFTER HS - 4yr 
*****************************************************/

foreach var of varlist wage_5to10_zero wage_10to15_zero wage_15to20_zero {

*Begin loop over completion status 
forvalues n=0/1 {	
	
sum `var' if completer_BA==`n', detail
local wage_mean = r(mean) 
preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_4yr, cohort) == 1

*No Controls or FE
reg `var' `majors_4yr' if completer_BA==`n', vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster rdec(0) bdec(0) sdec(0) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') addtext(Completer_BA, `n') keep(`majors_4yr')

*Controls, no FE 
reg `var' `majors_4yr'  `controls' if completer_BA==`n', vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster rdec(0) bdec(0) sdec(0) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_BA, `n', Controls, Yes) keep(`majors_4yr')


*With HS-Cohort, Post-Sec School-Cohort
reghdfe `var'  `majors_4yr'  `controls' if completer_BA==`n', absorb(i.last_col_4yr#i.cohort i.campus#i.cohort) vce(cluster inst_major) 
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster rdec(0) bdec(0) sdec(0) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_BA, `n', Post-Sec + HS FE + Controls, Yes)  keep(`majors_4yr')


restore
}
}


***STARTING HERE, KEEP ONLY COMPLETERS.
keep if completer_BA==1

/*****************************************************
 SECTION 2: DECOMPOSITION REGRESSIONS  - 4yr
*****************************************************/

*Rename coeffcient of variation variables

  rename coeff_var_wagehat_trend coeff_var_trend_wage

*BEGIN LINEAR MODEL REGRESSIONS (PERIOD = "")

*LOOP OVER MOVING AVERAGE SIZE
foreach ma in  "wage" "ma_4" "ma_8" {

*LOOP OVER COMPONENT OF THE DECOMPOSITION
foreach component in "alpha" "beta" "wagehat" {

*FULL  MODELING
 local period  "" 
 if "`ma'" != "ma_4" & "`period'" != "" continue

sum `component'_`ma', detail
local wage_mean = r(mean) 

preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_4yr, cohort) == 1

*No Controls or FE
reg `component'_`ma'`period' `majors_4yr'  , vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_BA, 1) keep(`majors_4yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (0) (0) (0) (_b[``major'']) (_se[``major'']) (e(N))
}

*With HS-Cohort, Post-Sec School-Cohort
reghdfe `component'_`ma'`period'  `majors_4yr'  `controls', absorb(i.last_col_4yr#i.cohort i.campus#i.cohort) vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_BA, 1, Post-Sec + HS FE + Controls, Yes)  keep(`majors_4yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (1) (1) (1) (_b[``major'']) (_se[``major'']) (e(N))
}

restore
}
}

*BEGIN REGRESSIONS FOR FULL AND PW-LINEAR MODELING 

*LOOP OVER COMPONENT OF DECOMPOSITION 
foreach component in "alpha" "beta" "wagehat"  {

*LOOP OVER TIMEFRAME 	
foreach period_num of numlist 5(1)19 {
	local period = "period`period_num'"
	local ma = "ma_4"
	
sum `component'_`ma'_`period', detail
local wage_mean = r(mean) 

preserve
*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_4yr, cohort) == 1


*No Controls or FE
reg `component'_`ma'_`period' `majors_4yr'  , vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_BA, 1) keep(`majors_4yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (0) (0) (0) (_b[``major'']) (_se[``major'']) (e(N))
}

*With HS-Cohort, Post-Sec School-Cohort
reghdfe `component'_`ma'_`period'  `majors_4yr'  `controls', absorb(i.last_col_4yr#i.cohort i.campus#i.cohort) vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_BA, 1, Post-Sec + HS FE + Controls, Yes)  keep(`majors_4yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (1) (1) (1) (_b[``major'']) (_se[``major'']) (e(N))
}

restore
}
}


/*********************************************
SECTION 3: COEFFICIENT OF VARIATION 
	   DIFFERENT MEASURES - 4yr 
***********************************************/ 

*LOOP OVER DIFFERENT WAYS TO MEASURE COEFFICIENT OF VARIATION
foreach component in  "coeff_var_trend_wage" "coeff_var_wage_ma_4" "coeff_var_wage_ma_8" {


sum `component', detail
local wage_mean = r(mean) 
preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_4yr, cohort) == 1


*No Controls or FE
reg `component' `majors_4yr'  , vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_BA, 1) keep(`majors_4yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("") ("") (0) (0) (0) (_b[``major'']) (_se[``major'']) (e(N))
}

*With HS-Cohort, Post-Sec School-Cohort
reghdfe `component' `majors_4yr'  `controls', absorb(i.last_col_4yr#i.cohort i.campus#i.cohort) vce(cluster inst_major)
outreg2 using "$tables/earnings_all_4yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_BA, 1, Post-Sec + HS FE + Controls, Yes)  keep(`majors_4yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("") ("") (1) (1) (1) (_b[``major'']) (_se[``major'']) (e(N))
}

restore
}



/*********************************
******TWO-YEAR INSTITUTIONS******
**********************************/

local n_vars : word count `majors_2yr'
local ntot : word count `all_2yr'
tokenize `majors_2yr'

*LOAD DATA COLLAPSED TO ONE OBS PER INDIVIDUAL (with the degree level vars)
use $data/collapsed_data_2yr.dta, clear

*GENERATE LAST COLLEGE-BY-MAJOR INDICATOR FOR CLUSTERING (SAI - 4/24/2023)
gen majors_cluster = .
global temp = 1
foreach major in `majors_2yr' lib_2yr {
	replace majors_cluster = $temp if `major' == 1
	global temp = $temp + 1
}
global drop temp 

gen double inst_major = last_col_2yr*100+	majors_cluster
sum inst_major, detai


*ABOUT 20 OBS ARE MISSING A MAJOR - JUST DROP THESE
drop if inst_major == .


*CREATE COMPLETION DUMMY VARIABLE 
gen completer_AA = deg_level1 == 1 | deg_level2==1 | deg_level3==1 

/*****************************************************
 SECTION 4. MEAN EARNINGS: 5-10, 10-15, 
		and 15-20 YEARS AFTER HS - 2yr 
*****************************************************/

foreach var of varlist wage_5to10_zero wage_10to15_zero wage_15to20_zero {

*Begin loop over completion status 
forvalues n=0/1 {	
	
sum `var' if completer_AA==`n', detail
local wage_mean = r(mean) 
preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_2yr, cohort) == 1

*No Controls or FE
reg `var' `majors_2yr' if completer_AA==`n', vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster rdec(0) bdec(0) sdec(0) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') addtext(Completer_AA, `n') keep(`majors_2yr')

*Controls, no FE 
reg `var' `majors_2yr'  `controls' if completer_AA==`n', vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster rdec(0) bdec(0) sdec(0) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_AA, `n', Controls, Yes) keep(`majors_2yr')


*With HS-Cohort, Post-Sec School-Cohort
reghdfe `var'  `majors_2yr'  `controls' if completer_AA==`n', absorb(i.last_col_2yr#i.cohort i.campus#i.cohort) vce(cluster inst_major) 
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster rdec(0) bdec(0) sdec(0) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_AA, `n', Post-Sec + HS FE + Controls, Yes)  keep(`majors_2yr')


restore
}
}


***STARTING HERE, KEEP ONLY COMPLETERS.
keep if completer_AA==1

/*****************************************************
 SECTION 5: DECOMPOSITION REGRESSIONS  - 2yr 
*****************************************************/

*Rename coeffcient of variation variables

  rename coeff_var_wagehat_trend coeff_var_trend_wage
  
*BEGIN LINEAR MODEL REGRESSIONS (PERIOD = "")

*LOOP OVER MOVING AVERAGE SIZE
foreach ma in  "wage" "ma_4" "ma_8" {

*LOOP OVER COMPONENT OF THE DECOMPOSITION
foreach component in "alpha" "beta" "wagehat" {

*FULL  MODELING
 local period  "" 
 if "`ma'" != "ma_4" & "`period'" != "" continue

sum `component'_`ma', detail
local wage_mean = r(mean) 

preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_2yr, cohort) == 1

*No Controls or FE
reg `component'_`ma'`period' `majors_2yr'  , vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_AA, 1) keep(`majors_2yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (0) (0) (0) (_b[``major'']) (_se[``major'']) (e(N))
}

*With HS-Cohort, Post-Sec School-Cohort
reghdfe `component'_`ma'`period'  `majors_2yr'  `controls', absorb(i.last_col_2yr#i.cohort i.campus#i.cohort) vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_AA, 1, Post-Sec + HS FE + Controls, Yes)  keep(`majors_2yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (1) (1) (1) (_b[``major'']) (_se[``major'']) (e(N))
}

restore
}
}

*BEGIN REGRESSIONS FOR FULL AND PW-LINEAR MODELING 

*LOOP OVER COMPONENT OF DECOMPOSITION 
foreach component in "alpha" "beta" "wagehat" {

*LOOP OVER TIMEFRAME 	
foreach period_num of numlist 5(1)19 {
	local period = "period`period_num'"
	local ma = "ma_4"
	
sum `component'_`ma'_`period', detail
local wage_mean = r(mean) 

preserve
*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_2yr, cohort) == 1


*No Controls or FE
reg `component'_`ma'_`period' `majors_2yr'  , vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_AA, 1) keep(`majors_2yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (0) (0) (0) (_b[``major'']) (_se[``major'']) (e(N))
}

*With HS-Cohort, Post-Sec School-Cohort
reghdfe `component'_`ma'_`period'  `majors_2yr'  `controls', absorb(i.last_col_2yr#i.cohort i.campus#i.cohort) vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_AA, 1, Post-Sec + HS FE + Controls, Yes)  keep(`majors_2yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("`ma'") ("`period'") (1) (1) (1) (_b[``major'']) (_se[``major'']) (e(N))
}

restore
}
}


/*********************************************
SECTION 6: COEFFICIENT OF VARIATION 
	   DIFFERENT MEASURES - 2yr 
***********************************************/ 

*LOOP OVER DIFFERENT WAYS TO MEASURE COEFFICIENT OF VARIATION
foreach component in "coeff_var_trend_wage" "coeff_var_wage_ma_4" "coeff_var_wage_ma_8" {


sum `component', detail
local wage_mean = r(mean) 
preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_2yr, cohort) == 1


*No Controls or FE
reg `component' `majors_2yr'  , vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean')  addtext(Completer_AA, 1) keep(`majors_2yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("") ("") (0) (0) (0) (_b[``major'']) (_se[``major'']) (e(N))
}

*With HS-Cohort, Post-Sec School-Cohort
reghdfe `component' `majors_2yr'  `controls', absorb(i.last_col_2yr#i.cohort i.campus#i.cohort) vce(cluster inst_major)
outreg2 using "$tables/earnings_all_2yr_completer", excel se noaster bdec(3) sdec(3) r2 append label ///
title("Mean Earnings Estimates `i' to `j' Years after High School") addstat(Mean Dept var, `wage_mean') ///
addtext(Completer_AA, 1, Post-Sec + HS FE + Controls, Yes)  keep(`majors_2yr')
forvalues major = 1/`n_vars'{
	post decomposition ("``major''") ("`component'") ("") ("") (1) (1) (1) (_b[``major'']) (_se[``major'']) (e(N))
}

restore
}

*Close decomposition file 
postclose decomposition



/******************************************************
SECTION 7 : FIGURES SHOWING EVOLUTION OF WAGES BY FIELD
	   LINEAR MODEL FOLLOWED BY PIECEWISE 
	   - 4yr and 2yr
*******************************************************/

*Set frame 
frame create graphing1
frame create graphing2

*** LINEAR MODEL (RESTRICT TO MODELS WITH ALL CONTROLS AND FE)

frame change graphing1
use "$tables/decomp_completer.dta", clear

keep if controls_cohort == 1 & hs_cohortfe == 1 & postsec_cohortfe == 1
keep if period == ""
reshape wide coeff se n, i(major ma controls_cohort hs_cohortfe postsec_cohortfe) j(component) string
expand 15
bysort major ma controls_cohort hs_cohortfe postsec_cohortfe: egen year = seq(), from(6) to(20)
gen quarter = year*4
gen predicted_wage_return = coeffalpha + coeffbeta*quarter


*4-YEAR SECTOR
foreach ma in "wage" "ma_2" "ma_4" "ma_8" {
	
	if "`ma'" == "wage" local graphsub = "Estimations Use Quarterly Earnings"
	if "`ma'" == "ma_2" local graphsub = "Estimations Use 2 Quarter Moving Average Earnings"
	if "`ma'" == "ma_4" local graphsub = "Estimations Use 4 Quarter Moving Average Earnings"
	if "`ma'" == "ma_8" local graphsub = "Estimations Use 8 Quarter Moving Average Earnings"

# delimit ;
 twoway (connected predicted_wage_return year if major == "ag_4yr" & ma == "`ma'", msize(small) msymbol(O))
	(connected predicted_wage_return year if major =="bio_4yr" & ma == "`ma'", msize(small) msymbol(D))
 	(connected predicted_wage_return year if major =="bus_4yr" & ma == "`ma'", msize(small) msymbol(T))
	(connected predicted_wage_return year if major =="com_4yr" & ma == "`ma'", msize(small) msymbol(S))
	(connected predicted_wage_return year if major =="eng_4yr" & ma == "`ma'", msize(small) msymbol(+))
	(connected predicted_wage_return year if major == "it_4yr" & ma == "`ma'", msize(small) msymbol(X))
	(connected predicted_wage_return year if major =="sci_4yr" & ma == "`ma'", msize(small) msymbol(Oh))
	(connected predicted_wage_return year if major =="soc_4yr" & ma == "`ma'", msize(small) msymbol(Dh))
	(connected predicted_wage_return year if major =="voc_4yr" & ma == "`ma'", msize(small) msymbol(Sh)
		legend(order(1 "Agri" 2 "Bio" 3 "Bus/Econ" 4 "Com" 5 "Eng" 6 "IT" 7 "Sci" 8 "Soc Sci"  9 "Voc") position(6) rows(2) region(lcolor(black) lpattern(solid)))
		ytitle("Predicted Quarterly Earnings Gap", size(small))
		ylabel(, labsize(small) angle(45))
		xtitle("Years After HS Graduation")
		title("Evolution of Predicted Earnings Over Career Cycle" "by 4-Year Major Relative to Lib Arts & Humanities")
		subtitle("`graphsub'")
		graphregion(color(white))
		saving("$plots/decomp_completer_`ma'_4yr.gph", replace) 
	);
	graph export "$plots/decomp_completer_`ma'_4yr.pdf", as(pdf) replace;
	# delimit cr
}



*2 YEAR SECTOR
foreach ma in "wage" "ma_2" "ma_4" "ma_8" {
	
	if "`ma'" == "wage" local graphsub = "Estimations Use Quarterly Earnings"
	if "`ma'" == "ma_2" local graphsub = "Estimations Use 2 Quarter Moving Average Earnings"
	if "`ma'" == "ma_4" local graphsub = "Estimations Use 4 Quarter Moving Average Earnings"
	if "`ma'" == "ma_8" local graphsub = "Estimations Use 8 Quarter Moving Average Earnings"

# delimit ;
 twoway (connected predicted_wage_return year if major == "ag_2yr" & ma == "`ma'", msize(small) msymbol(O))
	(connected predicted_wage_return year if major =="bio_2yr" & ma == "`ma'", msize(small) msymbol(D))
 	(connected predicted_wage_return year if major =="bus_2yr" & ma == "`ma'", msize(small) msymbol(T))
	(connected predicted_wage_return year if major =="com_2yr" & ma == "`ma'", msize(small) msymbol(S))
	(connected predicted_wage_return year if major =="eng_2yr" & ma == "`ma'", msize(small) msymbol(+))
	(connected predicted_wage_return year if major == "it_2yr" & ma == "`ma'", msize(small) msymbol(X))
	(connected predicted_wage_return year if major =="sci_2yr" & ma == "`ma'", msize(small) msymbol(Oh))
	(connected predicted_wage_return year if major =="soc_2yr" & ma == "`ma'", msize(small) msymbol(Dh))
	(connected predicted_wage_return year if major =="voc_2yr" & ma == "`ma'", msize(small) msymbol(Sh)
		legend(order(1 "Agri" 2 "Bio" 3 "Bus/Econ" 4 "Com" 5 "Eng" 6 "IT" 7 "Sci" 8 "Soc Sci" 9 "Voc") position(6) rows(2) region(lcolor(black) lpattern(solid)))
		ytitle("Predicted Quarterly Earnings Gap", size(small))
		ylabel(, labsize(small) angle(45))
		xtitle("Years After HS Graduation")
		title("Evolution of Predicted Earnings Over Career Cycle" "by 2-Year Major Relative to Lib Arts & Humanities")
		subtitle("`graphsub'")
		graphregion(color(white))
		saving("$plots/decomp_completer_`ma'_2yr.gph", replace) 
	);
	graph export "$plots/decomp_completer_`ma'_2yr.pdf", as(pdf) replace;
	# delimit cr
}


*** PIECEWISE LINEAR - FIGS 5 AND 6. 

frame change graphing2
use "$tables/decomp_completer.dta", clear

keep if controls_cohort == 1 & hs_cohortfe == 1 & postsec_cohortfe == 1
keep if period != ""
gen component_period = component + period
drop component period
reshape wide coeff se n, i(major ma controls_cohort hs_cohortfe postsec_cohortfe) j(component_period) string
expand 14
bysort major ma controls_cohort hs_cohortfe postsec_cohortfe: egen year = seq(), from(6) to(19)
gen quarter = year*4
  gen predicted_wage_return = coeffalphaperiod5 + coeffbetaperiod5*quarter if year ==5
foreach year of numlist 6(1)19 {
  replace predicted_wage_return = coeffalphaperiod`year' + coeffbetaperiod`year'*quarter if year ==`year'
}

*4-YEAR SECTOR
foreach ma in  "ma_4" {
local sector = "4yr"
		
		local major1name "Agri"
		local major2name "Bio"
		local major3name "Bus/Econ"
		local major4name "Comm"
		local major5name "Eng"
		local major6name "IT"		
		local major7name "Sci"
		local major8name "SocSci"
		local major9name "Voc"
		
		local color1 "navy"
		local color2 "brown"
		local color3 "cranberry"
		local color4 "maroon"
		local color5 "dkgreen"
		local color6 "lavender"
		local color7 "forest_green"
		local color8 "sand"
		local color9 "orange_red"
		

# delimit ;
 twoway (connected predicted_wage_return year if major == "ag_`sector'" & ma == "`ma'",   color(`color1') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "bio_`sector'" & ma == "`ma'",  color(`color2') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "bus_`sector'" & ma == "`ma'", color(`color3') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "com_`sector'" & ma == "`ma'",  color(`color4') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "eng_`sector'" & ma == "`ma'",  color(`color5') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "it_`sector'" & ma == "`ma'",   color(`color6') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "sci_`sector'" & ma == "`ma'",  color(`color7') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "soc_`sector'" & ma == "`ma'",  color(`color8') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "voc_`sector'" & ma == "`ma'",  color(`color9') msymbol(none) lpattern(solid)
		subtitle("Estimations Use 4-Quarter Moving Average Earnings", size(medsmall))	xtitle("Years After HS Graduation")
		ylabel(,labsize(small) angle(45)) yline(0)
		ytitle("Quarterly Earnings Relative to Liberal Arts")
		graphregion(color(white))
		legend(order(1 "`major1name'" 2 "`major2name'"  3 "`major3name'" 4 "`major4name'" 5 "`major5name'"  6 "`major6name'" 7 "`major7name'" 8 "`major8name'"  9 "`major9name'" 10 "`major10name'") rows(2) size(small) position(6) region(lcolor(black) lpattern(solid)))
		saving("$plots/decomp_completer_pw`sector'.gph", replace)
	);

# delimit cr
graph export "$plots/decomp_completer_pw`sector'.pdf", as(pdf) replace
	
}




*2-YEAR SECTOR
foreach ma in  "ma_4" {
local sector = "2yr"
		
		local major1name "Agri"
		local major2name "Bio"
		local major3name "Bus/Econ"
		local major4name "Comm"
		local major5name "Eng"
		local major6name "IT"		
		local major7name "Sci"
		local major8name "SocSci"
		local major9name "Voc"
		
		local color1 "navy"
		local color2 "brown"
		local color3 "cranberry"
		local color4 "maroon"
		local color5 "dkgreen"
		local color6 "lavender"
		local color7 "forest_green"
		local color8 "sand"
		local color9 "orange_red"
		

# delimit ;
 twoway (connected predicted_wage_return year if major == "ag_`sector'" & ma == "`ma'",   color(`color1') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "bio_`sector'" & ma == "`ma'",  color(`color2') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "bus_`sector'" & ma == "`ma'", color(`color3') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "com_`sector'" & ma == "`ma'",  color(`color4') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "eng_`sector'" & ma == "`ma'",  color(`color5') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "it_`sector'" & ma == "`ma'",   color(`color6') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "sci_`sector'" & ma == "`ma'",  color(`color7') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "soc_`sector'" & ma == "`ma'",  color(`color8') msymbol(none) lpattern(solid))
	(connected predicted_wage_return year if major == "voc_`sector'" & ma == "`ma'",  color(`color9') msymbol(none) lpattern(solid)
		subtitle("Estimations Use 4-Quarter Moving Average Earnings", size(medsmall))	xtitle("Years After HS Graduation")
		ylabel(,labsize(small) angle(45)) yline(0)
		ytitle("Quarterly Earnings Relative to Liberal Arts")
		graphregion(color(white))
		legend(order(1 "`major1name'" 2 "`major2name'"  3 "`major3name'" 4 "`major4name'" 5 "`major5name'"  6 "`major6name'" 7 "`major7name'" 8 "`major8name'"  9 "`major9name'") size(small) position(6) rows(2) region(lcolor(black) lpattern(solid)))
		saving("$plots/decomp_completer_pw`sector'.gph", replace)
	);

# delimit cr
graph export "$plots/decomp_completer_pw`sector'.pdf", as(pdf) replace
	
}

log close 


/** END **/ 

